/*******************************************************************************
Name: movers_analysis
*******************************************************************************/


clear

capture log close

log using  "${logdir}7-movers_analysis.log", replace

*creating list of movers and move years
use "${sensdir}addr_problems_working.dta", clear

egen travel_flag = max(ok_traveltime == 0), by(UNIT_NBR)
egen ever_rezone = max(rezone == 1), by(UNIT_NBR)

keep if move == 1

keep UNIT_NBR year move move_yr pre_centralcity t_distance t_time ever_rezone ///
	travel_flag ever_problem

save "${sensdir}movers_list.dta", replace

*OPEN EEO-1 DATA
use "${sensdir}movers_working.dta", clear

*RESTRICT TO USEFUL VARIABLES
keep UNIT_NBR UNIT_NM addr CITY_NM CNTY_NM FIP2NM year TOTAL* BLKT* SIC2 SIC3 FIP5 ///
	HDQ_NBR P_NAME fedcon firm_size msafips msa_name necma_name region division ///
	metarea city state group centralcity

drop if UNIT_NBR == ""

*MERGE IN PREDICTED BLACK SHARE (PREDICTIONS BASED ON DISTANCE FROM CBD)
merge UNIT_NBR year using "${datadir}pblackpd_all.dta", sort uniqusing

keep if _merge == 3
drop _merge

*MERGE IN MOVER INFORMATION
merge UNIT_NBR year using "${sensdir}movers_list.dta", sort uniqusing

tab _merge
drop if _merge == 2
drop _merge

*IDENTIFY PROBLEMATIC ESTABLISHMENTS (?)
egen flag = max(ever_problem == 1 & travel_flag == 1), by(UNIT_NBR)

drop if flag == 1
drop flag ever_problem travel_flag

replace move = 0 if move == .

egen temp = max(pre_centralcity == 1), by(UNIT_NBR)

replace pre_centralcity = temp
drop temp

egen ever_move = max(move), by(UNIT_NBR)

rm "${sensdir}movers_list.dta"


egen addr_num = sieve(addr), keep(num space)

replace addr_num = trim(itrim(addr_num))

*cap drop first
bysort UNIT_NBR (year): gen first = _n == 1



sort UNIT_NBR year

*keep only single move establishments
egen num_moves = sum((move == 1)), by(UNIT_NBR)
drop if num_moves > 1 & move != .

gen post = 0
replace post = 1 if move == 1

*POST WINDOW

replace post = 1 if post[_n - 1] == 1 & UNIT_NBR[_n - 1] == UNIT_NBR[_n] ///
	& ((group[_n - 1] == group[_n]) | (addr_num[_n - 1] == addr_num[_n])) ///
	& metarea[_n - 1] == metarea[_n] & HDQ_NBR[_n - 1] == HDQ_NBR[_n]

replace post = 1 if post[_n - 2] == 1 & UNIT_NBR[_n - 2] == UNIT_NBR[_n] ///
	& ((group[_n - 2] == group[_n]) | (addr_num[_n - 2] == addr_num[_n])) ///
	& metarea[_n - 2] == metarea[_n] & HDQ_NBR[_n - 2] == HDQ_NBR[_n]

replace post = 1 if post[_n - 3] == 1 & UNIT_NBR[_n - 3] == UNIT_NBR[_n] ///
	& ((group[_n - 3] == group[_n]) | (addr_num[_n - 3] == addr_num[_n])) ///
	& metarea[_n - 3] == metarea[_n] & HDQ_NBR[_n - 3] == HDQ_NBR[_n]

replace post = 1 if post[_n - 1] == 1 & UNIT_NBR[_n - 1] == UNIT_NBR[_n] ///
	& ((group[_n - 1] == group[_n]) | (addr_num[_n - 1] == addr_num[_n])) ///
	& metarea[_n - 1] == metarea[_n] & HDQ_NBR[_n - 1] == HDQ_NBR[_n]

*PRE WINDOW

gsort + UNIT_NBR - year

gen pre = 0
replace pre = 1 if move[_n - 1] == 1 & UNIT_NBR[_n - 1] == UNIT_NBR[_n + 1] ///
	& HDQ_NBR[_n - 1] == HDQ_NBR[_n] & metarea[_n - 1] == metarea[_n]

replace pre = 1 if pre[_n - 1] == 1 & UNIT_NBR[_n - 1] == UNIT_NBR[_n] ///
	& ((group[_n - 1] == group[_n]) | (addr_num[_n - 1] == addr_num[_n])) ///
	& metarea[_n - 1] == metarea[_n] & HDQ_NBR[_n - 1] == HDQ_NBR[_n]

replace pre = 1 if pre[_n - 2] == 1 & UNIT_NBR[_n - 2] == UNIT_NBR[_n] ///
	& ((group[_n - 2] == group[_n]) | (addr_num[_n - 2] == addr_num[_n])) ///
	& metarea[_n - 2] == metarea[_n] & HDQ_NBR[_n - 2] == HDQ_NBR[_n]

replace pre = 1 if pre[_n - 3] == 1 & UNIT_NBR[_n - 3] == UNIT_NBR[_n] ///
	& ((group[_n - 3] == group[_n]) | (addr_num[_n - 3] == addr_num[_n])) ///
	& metarea[_n - 3] == metarea[_n] & HDQ_NBR[_n - 3] == HDQ_NBR[_n]

replace pre = 1 if pre[_n - 1] == 1 & UNIT_NBR[_n - 1] == UNIT_NBR[_n] ///
	& ((group[_n - 1] == group[_n]) | (addr_num[_n - 1] == addr_num[_n])) ///
	& metarea[_n - 1] == metarea[_n] & HDQ_NBR[_n - 1] == HDQ_NBR[_n]


sort UNIT_NBR year

egen max_premetarea = max((pre == 1)*metarea), by(UNIT_NBR)
egen max_postmetarea = max((post == 1)*metarea), by(UNIT_NBR)

*RESTRICT TO ESTABLISHMENTS MOVING WITHIN METRO AREA
drop if (max_premetarea != max_postmetarea) & ever_move == 1

drop max_premetarea max_postmetarea

egen max_metarea = max(metarea), by(UNIT_NBR)
egen min_metarea = min(metarea), by(UNIT_NBR)

drop if (max_metarea != min_metarea) & ever_move == 0

egen max_preyear = max((pre == 1)*year), by(UNIT_NBR)
replace max_preyear = . if ever_move == 0

gen window = (post == 1 | pre == 1)

*keep only establishments with enough employees
egen min_year = min(year), by(UNIT_NBR)
egen init_size = max((min_year == year)*TOTAL10), by(UNIT_NBR)
keep if init_size >= 50
keep if TOTAL10 > 0

/*------------------------------------------------------------
*ESTIMATE EVENT STUDY MODELS
------------------------------------------------------------*/

egen unit_id = group(UNIT_NBR)

egen first_move_yr = min(year*(move == 1) + 10000*(move == 0)), by(unit_id)
replace first_move_yr = 0 if ever_move == 0

gen move_F6p = (first_move_yr - year >= 6)
gen move_F6 = (first_move_yr - year == 6)
gen move_F5 = (first_move_yr - year == 5)
gen move_F4 = (first_move_yr - year == 4)
gen move_F3 = (first_move_yr - year == 3)
gen move_F2 = (first_move_yr - year == 2)
gen move_F1 = (first_move_yr - year == 1)

gen first_move = (first_move_yr == year)

gen move_L6p = (year - first_move_yr >= 6) & first_move_yr != 0
gen move_L6 = (year - first_move_yr == 6)
gen move_L5 = (year - first_move_yr == 5)
gen move_L4 = (year - first_move_yr == 4)
gen move_L3 = (year - first_move_yr == 3)
gen move_L2 = (year - first_move_yr == 2)
gen move_L1 = (year - first_move_yr == 1)

*define event times

gen event_time = 6

replace event_time = 1 if (first_move_yr - year == 6)
replace event_time = 2 if (first_move_yr - year == 5)
replace event_time = 3 if (first_move_yr - year == 4)
replace event_time = 4 if (first_move_yr - year == 3)
replace event_time = 5 if (first_move_yr - year == 2)

replace event_time = 7 if (first_move_yr - year == 0)

replace event_time = 8 if (year - first_move_yr == 1)
replace event_time = 9 if (year - first_move_yr == 2)
replace event_time = 10 if (year - first_move_yr == 3)
replace event_time = 11 if (year - first_move_yr == 4)
replace event_time = 12 if (year - first_move_yr == 5)
replace event_time = 13 if (year - first_move_yr >= 6) & first_move_yr != 0

gen mover_slope = (year - 1971)*(ever_move == 1)

*classify balanced panel of movers
egen balanced_temp = sum(window*(move_F4 + move_F3 + move_F2 + move_F1 + first_move ///
	 + move_L1 + move_L2 + move_L3 + move_L4 + move_L5)), by(unit_id)
gen balanced = (balanced_temp == 10)
gen balsamp = balanced & (year - first_move_yr <= 5 & first_move_yr - year <= 4)

gen size = TOTAL10

gen ln_size = ln(TOTAL10)
gen f_black = BLKT10/TOTAL10

gen ln_firmsize = ln(firm_size)

*IDENTIFY ONE-DIGIT SIC CODE
gen SIC1 = .

replace SIC1 = 7 if SIC2 <= 9
replace SIC1 = 10 if SIC2 >= 10 & SIC2 <= 14
replace SIC1 = 15 if SIC2 >= 15 & SIC2 <= 19
replace SIC1 = 20 if SIC2 >= 20 & SIC2 <= 39
replace SIC1 = 40 if SIC2 >= 40 & SIC2 <= 49
replace SIC1 = 50 if SIC2 >= 50 & SIC2 <= 51
replace SIC1 = 52 if SIC2 >= 52 & SIC2 <= 59
replace SIC1 = 60 if SIC2 >= 60 & SIC2 <= 69
replace SIC1 = 70 if SIC2 >= 70 & SIC2 <= 89

egen keep_metarea = max((ever_move == 1)*(window == 1)), by(metarea)
keep if keep_metarea == 1

*calculate move distance

egen predist = max((year == max_preyear)*dist), by(unit_id)
egen postdist = max((move == 1) * dist), by(unit_id)

gen ddist = postdist - predist if ever_move

sum ddist if move == 1 & pre_centralcity, det

gen short = ddist < 7.5 if ddist != .

*summarize travel times

egen temp = max(t_time), by(UNIT_NBR)
replace t_time = temp if ever_move == 1
drop temp

egen temp = max(t_distance), by(UNIT_NBR)
replace t_distance = temp if ever_move == 1
drop temp

sum t_time if first & ever_move == 1 & pre_centralcity == 1, det
sum t_time if first & ever_move == 1 & pre_centralcity == 0, det

sum t_distance if first & ever_move == 1 & pre_centralcity == 1, det
sum t_distance if first & ever_move == 1 & pre_centralcity == 0, det

/*------------------------------------------------------------
*DESCRIPTIVE STATISTICS
------------------------------------------------------------*/

tab first if ever_move == 0 & centralcity == 1

sum size if ever_move == 0 & centralcity == 1
sum f_black if ever_move == 0 & centralcity == 1
sum pblack_n if ever_move == 0 & centralcity == 1
tab SIC1 if ever_move == 0 & centralcity == 1
sum dist if ever_move == 0 & centralcity == 1

tab first if ever_move == 1 & centralcity == 1

gen mover_sample = window == 1 & pre_centralcity == 1 & ddist >= 5

sum size if mover_sample == 1 & year == max_preyear
sum f_black if mover_sample == 1 & year == max_preyear
sum pblack_n if mover_sample == 1 & year == max_preyear
tab SIC1 if mover_sample == 1 & year == max_preyear
sum dist if mover_sample == 1 & year == max_preyear
sum ddist if mover_sample == 1 & year == max_preyear


/*----------------------------------------------------------------------*/
/*----------------------------------------------------------------------*/
*FIGURE 2: BLACK SHARE DROPS FOLLOWING ESTABLISHMENT RELOCATIONS TO SUBURBS
/*----------------------------------------------------------------------*/
/*----------------------------------------------------------------------*/


/*------------------------------------------------------------
*BASELINE RESULTS
------------------------------------------------------------*/

gen pblack = f_black*100

egen divXyear = group(division year)
egen divXindXyear = group(division SIC1 year)



*baseline
reghdfe pblack_n move_F6p move_F5 move_F4 move_F3 move_F2 first_move ///
move_L1 move_L2 move_L3 move_L4 move_L5 move_L6p ln_size ///
  if (window == 1 & pre_centralcity == 1 & ddist >= 5) | ///
	(centralcity == 1 & ever_move == 0), absorb(divXindXyear unit_id) vce(cluster unit_id)


	
tab move if e(sample)
tab year if first_move == 1 & e(sample) 
sum pblack_n if e(sample) & year == max_preyear
sum pblack if e(sample) & year == max_preyear

*pool 6+ years prior to relocation
replace event_time = 1 if (first_move_yr - year >= 6)

reghdfe pblack_n ib6.event_time ln_size ///
  if (window == 1 & pre_centralcity == 1 & ddist >= 5) | ///
	(centralcity == 1 & ever_move == 0), absorb(divXindXyear unit_id) vce(cluster unit_id)

replace event_time = 6 if (first_move_yr - year > 6)

estimates store baseline
	
*linear trend for movers
reghdfe pblack_n mover_slope move_F6  move_F5 move_F4 move_F3 move_F2 first_move ///
	move_L1 move_L2 move_L3 move_L4 move_L5 move_L6p ln_size ///
	if (window == 1 & pre_centralcity == 1 & ddist >= 5) | ///
	(centralcity == 1 & ever_move == 0), absorb(divXindXyear unit_id) vce(cluster unit_id)	



tab move if e(sample)
tab year if first_move == 1 & e(sample) 
sum pblack_n if e(sample) & year == max_preyear
sum pblack if e(sample) & year == max_preyear


reghdfe pblack_n mover_slope ib6.event_time ln_size ///
	if (window == 1 & pre_centralcity == 1 & ddist >= 5) | ///
	(centralcity == 1 & ever_move == 0), absorb(divXindXyear unit_id) vce(cluster unit_id)

estimates store trend

coefplot baseline trend, vertical drop(mover_slope ln_size _cons) yline(0) color(gs4 gs10) ///
	coeflabels(1.event_time = "-6" 2.event_time = "-5" 3.event_time = "-4" ///
	4.event_time = "-3" 5.event_time = "-2" 6.event_time = "-1" ///
	7.event_time = "0" 8.event_time = "1" 9.event_time = "2" ///
	10.event_time = "3" 11.event_time = "4" 12.event_time = "5" 13.event_time = "6+") ///
	xtitle("Years Since Relocation") ytitle("Normalized Black Share of Employees") ///
	p1(label(Baseline) lcolor(gs4) lpattern(solid)) p2(label(Linear Trend) lcolor(gs10) lpattern(dash)) baselevels ///
	recast(line line) ciopts(recast(rcap rcap)) ///
	plotregion(fcolor(white)) graphregion(fcolor(white))
	
graph save "${outdir}relocate_evstudy.gph", replace
graph export "${outdir}relocate_evstudy.pdf", replace

*balanced panel
reghdfe pblack_n move_F4 move_F3 move_F2 first_move ///
	move_L1 move_L2 move_L3 move_L4 move_L5 ln_size ///
	if (window == 1 & pre_centralcity == 1 & ddist >= 5 & balsamp == 1) | ///
	(centralcity == 1 & ever_move == 0), absorb(divXindXyear unit_id) vce(cluster unit_id)

tab move if e(sample)
tab year if first_move == 1 & e(sample) 
sum pblack_n if e(sample) & year == max_preyear
sum pblack if e(sample) & year == max_preyear


/*------------------------------------------------------------
*PREDICTED BLACK SHARE (LOCATION BASED)
------------------------------------------------------------*/	

*baseline
reghdfe pblackpd move_F6p move_F5 move_F4 move_F3 move_F2 first_move ///
move_L1 move_L2 move_L3 move_L4 move_L5 move_L6p ln_size ///
  if (window == 1 & pre_centralcity == 1 & ddist >= 5) | ///
	(centralcity == 1 & ever_move == 0), absorb(divXindXyear unit_id) vce(cluster unit_id)	
	
*linear trend for movers
reghdfe pblackpd mover_slope move_F6 move_F5 move_F4 move_F3 move_F2 first_move ///
	move_L1 move_L2 move_L3 move_L4 move_L5 move_L6p ln_size ///
	if (window == 1 & pre_centralcity == 1 & ddist >= 5) | ///
	(centralcity == 1 & ever_move == 0), absorb(divXindXyear unit_id) vce(cluster unit_id)	
	
*balanced panel
reghdfe pblackpd move_F4 move_F3 move_F2 first_move ///
	move_L1 move_L2 move_L3 move_L4 move_L5 ln_size ///
	if (window == 1 & pre_centralcity == 1 & ddist >= 5 & balsamp == 1) | ///
	(centralcity == 1 & ever_move == 0), absorb(divXindXyear unit_id) vce(cluster unit_id)
	
	
/*------------------------------------------------------------
*PREDICTED BLACK SHARE (OCCUPATION BASED)
------------------------------------------------------------*/

gen pblack_pocc = 0

forvalues i = 1(1)9 {
	
egen total_occ`i' = sum(TOTAL`i'), by(division SIC1 year)
egen total_occ`i'_black = sum(BLKT`i'), by(division SIC1 year)

*leave one out mean of black share by industry and division
gen occ`i'_share_black = (total_occ`i'_black - BLKT`i')/(total_occ`i' - TOTAL`i')

gen occ`i'_share = TOTAL`i'/TOTAL10

replace pblack_pocc = pblack_pocc + occ`i'_share*occ`i'_share_black
}

*baseline
reghdfe pblack_pocc move_F6p move_F5 move_F4 move_F3 move_F2 first_move ///
move_L1 move_L2 move_L3 move_L4 move_L5 move_L6p ln_size ///
  if (window == 1 & pre_centralcity == 1 & ddist >= 5) | ///
	(centralcity == 1 & ever_move == 0), absorb(divXindXyear unit_id) vce(cluster unit_id)
	
*pool 6+ years prior to relocation
replace event_time = 1 if (first_move_yr - year >= 6)

reghdfe pblack_pocc ib6.event_time ln_size ///
  if (window == 1 & pre_centralcity == 1 & ddist >= 5) | ///
	(centralcity == 1 & ever_move == 0), absorb(divXindXyear unit_id) vce(cluster unit_id)

replace event_time = 6 if (first_move_yr - year > 6)

estimates store baseline
	
	
*linear trend for movers
reghdfe pblack_pocc mover_slope move_F6 move_F5 move_F4 move_F3 move_F2 first_move ///
	move_L1 move_L2 move_L3 move_L4 move_L5 move_L6p ln_size ///
	if (window == 1 & pre_centralcity == 1 & ddist >= 5) | ///
	(centralcity == 1 & ever_move == 0), absorb(divXindXyear unit_id) vce(cluster unit_id)	

reghdfe pblack_pocc mover_slope ib6.event_time ln_size ///
	if (window == 1 & pre_centralcity == 1 & ddist >= 5) | ///
	(centralcity == 1 & ever_move == 0), absorb(divXindXyear unit_id) vce(cluster unit_id)

estimates store trend

coefplot baseline trend, vertical drop(mover_slope ln_size _cons) yline(0) ///
	coeflabels(1.event_time = "-6" 2.event_time = "-5" 3.event_time = "-4" ///
	4.event_time = "-3" 5.event_time = "-2" 6.event_time = "-1" ///
	7.event_time = "0" 8.event_time = "1" 9.event_time = "2" ///
	10.event_time = "3" 11.event_time = "4" 12.event_time = "5" 13.event_time = "6+") ///
	xtitle("Years Since Relocation") ytitle("Predicted Black Share of Employees") ///
	p1(label(Baseline) lcolor(gs4) lpattern(solid)) p2(label(Linear Trend) lcolor(gs10) lpattern(dash)) baselevels ///
	recast(line line) ciopts(recast(rcap rcap)) ///
	plotregion(fcolor(white)) graphregion(fcolor(white)) ///
	yscale(r(-0.025 0.01)) ylabel(-0.025(0.005)0.01, gmin gmax)
	
graph save "${outdir}relocate_evstudy_pocc.gph", replace
graph export "${outdir}relocate_evstudy_pocc.pdf", replace


*balanced panel
reghdfe pblack_pocc move_F4 move_F3 move_F2 first_move ///
	move_L1 move_L2 move_L3 move_L4 move_L5 ln_size ///
	if (window == 1 & pre_centralcity == 1 & ddist >= 5 & balsamp == 1) | ///
	(centralcity == 1 & ever_move == 0), absorb(divXindXyear unit_id) vce(cluster unit_id)
	

log close
